clear
clc
close all

N = 485; % number of locations
S = 11;
T = 9; % Number of periods
first_period = 3; % 1: 1850; 2: 1870, 3: 1890, 4: 1910, 5: 1930, 6: 1950, 7: 1970, 8: 1990, 9: 2010
T_aux = 7; % Number of periods for which we compute the equilibrium

theta = 4.48; % This is actually theta, NOT theta_aux
zeta = 4; % For zeta, use the upper-end value in Allen Donaldson (2020)
betta = 0.5; % Share of young consumption in lifetime utility
delta = 1;

% Moving costs: (one line per origin, one column per destination)
d_loc = load('dsets_for_model\cz_bilateral_distance_wide_formatlab_cpp.csv');
mu_migr_fixed_0_1990 = .7888229; % This comes from the gravity of migration flows
mu_migr_0_1990 = .0028124; % This comes from the gravity of migration flows
mu_migr_fixed_1990 = mu_migr_fixed_0_1990 / zeta;
mu_migr_1990 = mu_migr_0_1990 / zeta;
mu = load_d_migr(d_loc, mu_migr_fixed_1990,mu_migr_1990, N);

data_aggregate_growth = (1.02)^20;

g_star_lambda = (data_aggregate_growth^theta) - 1;

A_o = 1.42*data_aggregate_growth; % Experience premium - this only matters for computing local and aggregate income
% 1.42 is approximately the experience premium in 1990 in Figure 5
% (top-right)in Heathcote et al. RED issue (USA)

% Load data:
data_aux = load('dsets_for_model\cz_decade_population_cpp.csv');
cz_list = data_aux(1:N,1); data_pop = [zeros(N,1) reshape(data_aux(:,3),N,T-1)]; % Note: we are adding the first column as 1950, but it only contains 0's. We don't use population in 1850 anywhere in the calibration
% Adjust data_pop so that drop cannot be below 30% (inconsistent with model)
total_correction = zeros(1,T); % This variable keeps track of how much population is being added in each period
for t=first_period+1:first_period+T_aux-1
    for i=1:N
        if data_pop(i,t) < 0.7*data_pop(i,t-1)
            total_correction(t) = total_correction(t) + (0.7*data_pop(i,t-1) - data_pop(i,t))
            data_pop(i,t) = 0.7*data_pop(i,t-1);
        end
    end
end

data_pop_growth = log(data_pop(:,2:end)) - log(data_pop(:,1:end-1));

data_aux = load('dsets_for_model\patents_data\cz_decade_class_patents_cpp.csv');
data_pat = zeros(N,S,T);
for t=1:T
    data_pat(:,:,t) = data_aux((t-1)*N+1:t*N,3:end);
end
clear data_aux

% Define the outcome variables that we will obtain from the dynamic model:
u_dyn = zeros(N,T);
lambda_dyn = zeros(N,S,T); % This is actually lambda, NOT lambda_aux = lambda^(1/sigma)
pi_dyn = zeros(N,N,S,T);
pop_dyn = zeros(N,T);
L_k_dyn = zeros(N,T);
L_y_dyn = zeros(N,S,T); L_o_dyn = zeros(N,S,T);
prob_orig_given_dest_y_dyn = zeros(N,N,T);
migration_exposure_dyn = zeros(N,N,T);
income_py_dyn = zeros(N,T); income_pc_dyn = zeros(N,T);
aggr_income_py_dyn = zeros(1,T);
fertility_dyn = ones(1,T);
adjustment_factor_dyn = ones(1,T);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% STEP 1: Initial period (BGP) %%%%%%%%%%%%%%%%%%

data_pat_aux = data_pat(:,:,first_period) + data_pat(:,:,first_period-1) + data_pat(:,:,first_period-2);

u_0 = data_pop(:,first_period).^(1/zeta);
u_0 = u_0 / geometric_mean(u_0);
L_y_0 = (1/3)*(1/S)*repmat(data_pop(:,first_period),1,S);

lambda_0 = data_pat_aux ./ L_y_0;
lambda_0 = lambda_0 / mean(mean(lambda_0));

error_0 = 1000;

while error_0 > 0.0001
   
    kernel_utility_0 = u_0.*(lambda_0.^(1/theta)); % The experience premium should appear as A_o^(1-betta) but does not matter, since it appears at the denominator too
    
    pi_0 = zeros(N,N,S);
    
    for j=1:N
        denom = sum(sum((kernel_utility_0./repmat(mu(j,:)',1,S)).^zeta));
        % Probability of going from j to any destination (:):
        pi_0(j,:,:) = ((kernel_utility_0./repmat(mu(j,:)',1,S)).^zeta)./denom;
    end

    error_L_y_0 = 1000;
    while error_L_y_0 > 0.1
       L_y_0_aux = zeros(N,S);
       for j=1:N
           L_y_0_aux = L_y_0_aux + sum(L_y_0(j,:))*squeeze(pi_0(j,:,:));
       end
       error_L_y_0 = max(max(abs(L_y_0 - L_y_0_aux)));
       L_y_0 = L_y_0_aux;
    end
    L_o_0 = L_y_0; L_k_0 = sum(L_y_0,2);
    pop_0 = L_k_0 + sum(L_y_0,2) + sum(L_o_0,2);
    
    lambda_1 = zeros(N,S);
    for i=1:N
        for s=1:S
            if L_y_0(i,s) > 0 && data_pat_aux(i,s) > 0
                lambda_1(i,s) = data_pat_aux(i,s) / L_y_0(i,s);
            end
        end
    end 
    lambda_1 = lambda_1 / mean(mean(lambda_1));
    
    u_1 = ((data_pop(:,first_period)./pop_0).^(1/zeta)).*u_0;
    u_1 = u_1./geometric_mean(u_1);
    
    error_0 = max(abs(u_0 - u_1)) + max(max(abs(lambda_0 - lambda_1)))
    
    lambda_0 = 0.98*lambda_0 + 0.02*lambda_1;
    u_0 = 0.98*u_0 + 0.02*u_1;
    
end
L_k_dyn(:,first_period) = L_k_0; L_y_dyn(:,:,first_period) = L_y_0; L_o_dyn(:,:,first_period) = L_o_0;
income_py_dyn(:,first_period) = sum(L_y_0.*(lambda_dyn(:,:,first_period).^(1/theta)),2) ./ sum(L_y_0,2);
lambda_dyn(:,:,first_period) = lambda_0;
aggr_income_py_dyn(first_period) = sum(sum(L_y_0.*(lambda_dyn(:,:,first_period).^(1/theta)))) / sum(sum(L_y_0));

pop_dyn(:,first_period) = pop_0;
pi_dyn(:,:,:,first_period) = pi_0;
u_dyn(:,first_period) = u_0;

for i=1:N
    for j=1:N
        prob_orig_given_dest_y_dyn(i,j,first_period) = sum(pi_dyn(i,j,:,first_period))*L_k_dyn(i,first_period) / sum(L_y_dyn(j,:,first_period));
    end
end
prob_orig_given_dest_y_dyn(:,:,first_period-1) = prob_orig_given_dest_y_dyn(:,:,first_period);

migration_exposure_dyn(:,:,first_period) = prob_orig_given_dest_y_dyn(:,:,first_period).*(1-eye(N)) ./ sum(prob_orig_given_dest_y_dyn(:,:,first_period).*(1-eye(N)),1);
migration_exposure_dyn(:,:,first_period-1) = migration_exposure_dyn(:,:,first_period);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% STEP 2: Dynamics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

for t=first_period+1:first_period+T_aux-1

    t
    
    fertility_dyn(t) = (sum(data_pop(:,t)) - sum(L_k_dyn(:,t-1)) - sum(sum(L_y_dyn(:,:,t-1)))) / sum(L_k_dyn(:,t-1));
    adjustment_factor_0 = 1;
    
    data_pat_aux = data_pat(:,:,t);
    
    % Now, we do not know lambda_dyn yet. Assuming betta(BO) -> 1:
    % lambda_dyn = lambda_dyn_t-1 + SUM, where SUM = (Pat/Young)*G_t
    
    L_y_0 = (1/S)*(repmat(sum(L_y_dyn(:,:,t-1),2),1,S))*fertility_dyn(t); % Just a guess
    
    lambda_0 = lambda_dyn(:,:,t-1) + adjustment_factor_0*(data_pat_aux ./ L_y_0);
    
    u_0 = u_dyn(:,t-1);
    u_0 = u_0./geometric_mean(u_0);
    
    error_0 = 1000;
    
    while error_0 > 0.001
        
        kernel_utility_0 = u_0.*(lambda_0.^(1/theta)); % The experience premium should appear as A_o^(1-betta) but does not matter, since it appears at the denominator too
        
        pi_0 = zeros(N,N,S);
    
        for j=1:N
            denom = sum(sum((kernel_utility_0./repmat(mu(j,:)',1,S)).^zeta));
            % Probability of going from j to any destination (:):
            pi_0(j,:,:) = ((kernel_utility_0./repmat(mu(j,:)',1,S)).^zeta)./denom;
        end
    
        L_y_1 = zeros(N,S);
        for j=1:N
            L_y_1 = L_y_1 + L_k_dyn(j,t-1)*squeeze(pi_0(j,:,:));
        end
        L_o_0 = L_y_dyn(:,:,t-1); L_k_0 = fertility_dyn(t)*sum(L_y_1,2);
        pop_0 = L_k_0 + sum(L_y_1,2) + sum(L_o_0,2);
        
        income_py_dyn(:,t) = sum(L_y_1.*(lambda_0.^(1/theta)),2) ./ sum(L_y_1,2);
        aggr_income_py_dyn(t) = sum(sum(L_y_1.*(lambda_0.^(1/theta)))) / sum(sum(L_y_1));
  
        aggregate_growth = aggr_income_py_dyn(1,t) / aggr_income_py_dyn(1,t-1);
        
        adjustment_factor_dyn(t) = (data_aggregate_growth / aggregate_growth)*adjustment_factor_0;
        
        u_1 = ((data_pop(:,t)./pop_0).^(1/zeta)).*u_0;
        u_1 = u_1./geometric_mean(u_1);
        
        error_0 = abs(adjustment_factor_0 - adjustment_factor_dyn(t)) + max(abs(u_0 - u_1)) + max(max(L_y_0 - L_y_1))

        adjustment_factor_0 = 0.98*adjustment_factor_0 + 0.02*adjustment_factor_dyn(t);
        
        L_y_0 = L_y_1;
        
        lambda_0 = zeros(N,S);
        for i=1:N
            for s=1:S
                if L_y_0(i,s) > 0
                    lambda_0(i,s) = delta*lambda_dyn(i,s,t-1) + adjustment_factor_dyn(t)*(data_pat_aux(i,s) / L_y_0(i,s));
                end
            end
        end
        
        u_0 = 0.98*u_0 + 0.02*u_1;
        
    end

    L_k_dyn(:,t) = L_k_0; L_y_dyn(:,:,t) = L_y_0; L_o_dyn(:,:,t) = L_o_0;
    pop_dyn(:,t) = pop_0;
    pi_dyn(:,:,:,t) = pi_0;
    u_dyn(:,t) = u_0;
    lambda_dyn(:,:,t) = lambda_0;
    income_pc_dyn(:,t) = (income_py_dyn(:,t).*sum(L_y_dyn(:,:,t),2) + A_o*income_py_dyn(:,t-1).*sum(L_o_dyn(:,:,t),2)) ./ (L_k_dyn(:,t) + sum(L_y_dyn(:,:,t),2) + sum(L_o_dyn(:,:,t),2));
    
    for i=1:N
        for j=1:N
            prob_orig_given_dest_y_dyn(i,j,t) = sum(pi_dyn(i,j,:,t))*L_k_dyn(i,t-1) / sum(L_y_dyn(j,:,t));
        end
    end
    migration_exposure_dyn(:,:,t) = prob_orig_given_dest_y_dyn(:,:,t).*(1-eye(N)) ./ sum(prob_orig_given_dest_y_dyn(:,:,t).*(1-eye(N)),1);

end

pop_growth_dyn = log(pop_dyn(:,2:end)) - log(pop_dyn(:,1:end-1));

% Weighted standard deviation in 1990 (in the data, 0.2144793
ave_log_income_1990 = (1/sum(pop_dyn(:,8)))*sum(log(income_pc_dyn(:,8)).*pop_dyn(:,8));
sqrt((1/sum(pop_dyn(:,8)))*sum(((log(income_pc_dyn(:,8)) - ave_log_income_1990).^2).*pop_dyn(:,8)))

lambda_dyn(:,:,first_period-1) = lambda_dyn(:,:,first_period) .* (data_aggregate_growth.^-theta);
pi_dyn(:,:,:,first_period-1) = pi_dyn(:,:,:,first_period);

csvwrite('results_model\log_income_pc_1950_forstata.csv',[cz_list log(income_pc_dyn(:,6))])
csvwrite('results_model\log_income_pc_1970_forstata.csv',[cz_list log(income_pc_dyn(:,7))])

csvwrite('results_model\log_income_pc_1990_forstata.csv',[cz_list log(income_pc_dyn(:,8))])
csvwrite('results_model\log_income_pc_2010_forstata.csv',[cz_list log(income_pc_dyn(:,9))])

%%% Income pc by sector in 1990:

income_py_bysector_1990 = zeros(N,S);
for i=1:N
    for s=1:S
        income_py_bysector_1990(i,s) = lambda_dyn(i,s,8).^(1/theta);
    end
end
csvwrite('results_model\income_py_bysector_1990_forstata.csv',[cz_list income_py_bysector_1990])

income_py_bysector_2010 = zeros(N,S);
for i=1:N
    for s=1:S
        income_py_bysector_2010(i,s) = lambda_dyn(i,s,9).^(1/theta);
    end
end
csvwrite('results_model\income_py_bysector_2010_forstata.csv',[cz_list income_py_bysector_2010])

% Set omega as in Eckert and Peters (rho in Table 3, Feb 2023 version):
omega = -0.15;
ubar_dyn = u_dyn.*(pop_dyn.^-omega);

prob_dest_given_origin_1990 = zeros(N,N);
for i=1:N
    for j=1:N
        prob_dest_given_origin_1990(i,j) = sum(pi_dyn(i,j,:,8));
    end
end
prob_dest_given_origin_1990 = prob_dest_given_origin_1990'; prob_dest_given_origin_1990 = prob_dest_given_origin_1990(:);
cz_origin_aux = repmat(cz_list',N,1); cz_origin_aux = cz_origin_aux(:);
cz_destination_aux = repmat(cz_list,N,1);
for_stata = [cz_origin_aux cz_destination_aux prob_dest_given_origin_1990];
csvwrite('results_model\prob_dest_given_origin_1990.csv',for_stata)

% save('calibr.mat')
